Setup

Benthic

Point cover

Data source: “BenthicPointCoverScaledByTransect.xlsx” (scaled to not include non-reef substrate)

Import + wrangle data

benthic_var <- c("lc", "cca", "ta", "tas", "fma", "cyan", "ainv", "peys")

benthic_cover_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "BenthicPointCoverScaled", "BenthicPointCoverScaledByTransect.xlsx"), sheet = "Data") %>% 
  clean_names() %>%
  mutate(year = year(date),
         trans = as.character(trans), # to bind with NPA data - some trans numbers include surveyor initials
         t_tas = tas + tam) %>% 
  select(site, year, trans, benthic_var) %>%
  mutate(across(benthic_var, ~ . / 100)) %>%
  bind_rows(
    bind_rows(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoverScaledByTransect.xlsx"), sheet = "Overall") %>%
                clean_names() %>%
                mutate(`transect_name` = as.character(`transect_name`)) %>%  # to match NPA with surv initials
                left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoverScaledByTransect.xlsx"), sheet = "tTA") %>%
                            clean_names() %>%
                            select(transect_id, ta, sta, tas, tam)),
              read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicCoverScaledByTransect.xlsx"), sheet = "Overall") %>%
                clean_names() %>%
                left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicCoverScaledByTransect.xlsx"), sheet = "tTA") %>%
                            clean_names() %>%
                            select(transect_id, ta, sta, tas, tam))) %>%
      mutate(year = 2025,
             survey_name = if_else(survey_name == "Little Bird Patch", "Little Bird Backreef", survey_name),
             tas_sum = tas + sta + tam) %>%
      select(site = survey_name, year, trans = transect_name, lc = t_coral, cca = t_cca, ta, tas = tas_sum, fma = t_fma, cyan = t_cyan, ainv = t_ainv, peys = t_pey)) %>%
  mutate(site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA sites that have site numbers with different name variations afterwards
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                                    "NEMMA"))) %>%
  filter(year != 2022) %>% # 2022 was incomplete data collection year for NPA
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
  ungroup()

benthic_cover_site <- benthic_cover_trans %>%
  group_by(site, site_cat, year) %>%
  rename_with(~ str_remove(.x, "t_")) %>%
  summarise(across(c(benthic_var),
                   list(mean = ~ mean(.x, na.rm = TRUE),
                        sd   = ~ sd(.x)),
                   .names = "{.col}_{.fn}")) %>%
  ungroup()

benthic_cover_year <- benthic_cover_site %>%
  select(!contains("_se")) %>%
  rename_with(~ str_remove(.x, "_mean")) %>%
  group_by(year, site_cat) %>%
  summarise(across(c(benthic_var),
                   list(mean = ~ mean(.x, na.rm = TRUE),
                        sd   = ~ sd(.x)),
                   .names = "{.col}_{.fn}")) %>%
  ungroup()

write.csv(benthic_cover_year, here("agrra_monitoring", "data_outputs", "benthic_cover_year.csv"))

Line plots

ggplot(benthic_cover_site, aes(x = year, y = lc_mean, group = site, shape = site)) +
  geom_point() +
  scale_shape_manual(values = 0:35) +
  geom_line() +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "Year", y = "Live Coral Cover", color = "Site", group = "Site") +
  theme_minimal()

ggplot(benthic_cover_site, aes(x = year, y = fma_mean, group = site, shape = site)) +
  geom_point() +
  scale_shape_manual(values = 0:35) +
  geom_line() +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "Year", y = "FMA Cover", color = "Site", group = "Site") +
  theme_minimal()

ggplot(benthic_cover_site, aes(x = year, y = ta_mean, group = site, shape = site)) +
  geom_point() +
  scale_shape_manual(values = 0:35) +
  geom_line() +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "Year", y = "TA Cover", color = "Site", group = "Site") +
  theme_minimal()

ggplot(benthic_cover_site, aes(x = year, y = tas_mean, group = site, shape = site)) +
  geom_point() +
  scale_shape_manual(values = 0:35) +
  geom_line() +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "Year", y = "TA/Sediment Cover", color = "Site", group = "Site") +
  theme_minimal()

ggplot(benthic_cover_site, aes(x = year, y = tas_mean)) +
  geom_point() +
  scale_shape_manual(values = 0:35) +
  geom_errorbar(aes(ymin = tas_mean - tas_sd, ymax = tas_mean + tas_sd), width = .2) +
  geom_line() +
  scale_y_continuous(labels = label_percent()) +
  facet_wrap(~ site, ncol = 5) +
  labs(x = "Year", y = "TA/Sediment Cover") +
  theme_bw()

### Bar plots

ggplot(benthic_cover_year %>%
         group_by(site_cat) %>%
         mutate(year = factor(year, levels = c("2017", "2019", "2025"))), 
       aes(x = year, y = lc_mean)) +
  geom_col(fill = "coral", color = "black", alpha = 0.8) +
  geom_errorbar(aes(ymin = lc_mean - lc_sd, ymax = lc_mean + lc_sd), width = .2) +
  scale_y_continuous(labels = label_percent()) +
  facet_wrap(~ site_cat, scales = "free_x") +
  labs(x = "Year", y = "Live Coral Cover") +
  theme_minimal()

Radar plots

NEMMA

max_pc <- benthic_cover_year %>%
  filter(site_cat == "NEMMA") %>%
  select(contains("_mean")) %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol_pc <- ncol(benthic_cover_year %>%
                  filter(site_cat == "NEMMA") %>%
                  select(contains("_mean")))

benthic_rdr <- 
  rbind(rep(max_pc, ncol_pc), 
        rep(0, ncol_pc), 
        benthic_cover_year %>%
          filter(site_cat == "NEMMA") %>%
          column_to_rownames(var = "year") %>%
          select(contains("_mean")) %>%
          rename_with(~ gsub("_mean", "", .x))
        )
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

radarchart(benthic_rdr,
           pcol = line_colors,
           pfcol = fill_colors,
           cglcol = "grey")

# doesn't like adding legend within rmarkdown - but can try within knit document
legend(
  x = "topright",
  legend = rownames(benthic_rdr)[3:4],  # skip max/min rows
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",                   # no box
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors            # show fill colors in legend
)

NDNP

max_pc <- benthic_cover_year %>%
  filter(site_cat == "NDNP") %>%
  select(contains("_mean")) %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol_pc <- ncol(benthic_cover_year %>%
                  filter(site_cat == "NDNP") %>%
                  select(contains("_mean")))


benthic_rdr <- 
  rbind(rep(max_pc, ncol_pc), 
        rep(0, ncol_pc), 
        benthic_cover_year %>%
          filter(site_cat == "NDNP") %>%
          column_to_rownames(var = "year") %>%
          select(contains("_mean")) %>%
          rename_with(~ gsub("_mean", "", .x))
        )
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

radarchart(benthic_rdr,
           pcol = line_colors,
           pfcol = fill_colors,
           cglcol = "grey")

# doesn't like adding legend within rmarkdown - but can try within knit document
legend(
  x = "topright",
  legend = rownames(benthic_rdr)[3:4],  # skip max/min rows
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",                   # no box
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors            # show fill colors in legend
)

NMDS

NEMMA

benthic_nmds <- benthic_cover_site %>%
  filter(site_cat == "NEMMA") %>%
  select(!site_cat) %>%
  select(!contains("_se")) %>%
  rename_with(~ gsub("_mean", "", .x)) %>%
  mutate(year = as.character(year))

# Separate metadata and community data
benthic_meta <- benthic_nmds %>% ungroup %>% select(site, year)
benthic_pc   <- benthic_nmds %>% ungroup %>% select(-site, -year)

# Run NMDS
set.seed(123) # fixes random number generator so results are reproducible
benthic_nmds_ord <- metaMDS(benthic_pc, distance = "bray", k = 2, trymax = 100, trace = 0)

# Extract NMDS scores and add metadata
site_scores <- scores(benthic_nmds_ord, display = "sites") %>%
  as.data.frame() %>%
  mutate(site = benthic_meta$site,
         year = benthic_meta$year)

# Extract pc (community variable) scores and add metadata
pc_scores <- scores(benthic_nmds_ord, display = "species") %>%
  as.data.frame() %>%
  rownames_to_column(var = "category")

# Plot NMDS

## defining NMDS arrows
arrow_mult <- 2
pc_scores_scaled <- pc_scores %>%
  mutate(NMDS1 = NMDS1 * arrow_mult,
         NMDS2 = NMDS2 * arrow_mult)

## adding offset columns to move arrow labels
pc_scores_scaled <- pc_scores_scaled %>%
  mutate(
    label_nudge_x = ifelse(NMDS1 >= 0, 0.05, -0.05),
    label_nudge_y = ifelse(NMDS2 >= 0, 0.05, -0.05)
  )

## plot
ggplot(site_scores, aes(x = NMDS1, y = NMDS2, color = year, shape = site)) +
  geom_point(size = 4) +
  scale_shape_manual(values = 0:25) +
  stat_ellipse(aes(group = year), type = "t", linetype = 2) +
  geom_segment(data = pc_scores_scaled,
               aes(x = 0, y = 0, xend = NMDS1, yend = NMDS2),
               arrow = arrow(length = unit(0.3, "cm")),
               color = "black",
               inherit.aes = FALSE) +
  geom_text(data = pc_scores_scaled,
            aes(x = NMDS1 + label_nudge_x,
                y = NMDS2 + label_nudge_y,
                label = category),
            color = "black",
            inherit.aes = FALSE) +
  theme_minimal()

# PERMANOVA (adonis2)
set.seed(123)
adonis_res <- adonis2(benthic_pc ~ year, 
                      data = benthic_meta, 
                      method = "bray", 
                      permutations = 999)

print(adonis_res)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = benthic_pc ~ year, data = benthic_meta, permutations = 999, method = "bray")
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1  0.27214 0.26181 4.9652  0.001 ***
## Residual 14  0.76733 0.73819                  
## Total    15  1.03948 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

NDNP

benthic_nmds <- benthic_cover_site %>%
  filter(site_cat == "NDNP") %>%
  select(!site_cat) %>%
  select(!contains("_se")) %>%
  rename_with(~ gsub("_mean", "", .x)) %>%
  mutate(year = as.character(year))

# Separate metadata and community data
benthic_meta <- benthic_nmds %>% ungroup %>% select(site, year)
benthic_pc   <- benthic_nmds %>% ungroup %>% select(-site, -year)

# Run NMDS
set.seed(123) # fixes random number generator so results are reproducible
benthic_nmds_ord <- metaMDS(benthic_pc, distance = "bray", k = 2, trymax = 100, trace = 0)

# Extract NMDS scores and add metadata
site_scores <- scores(benthic_nmds_ord, display = "sites") %>%
  as.data.frame() %>%
  mutate(site = benthic_meta$site,
         year = benthic_meta$year)

# Extract pc (community variable) scores and add metadata
pc_scores <- scores(benthic_nmds_ord, display = "species") %>%
  as.data.frame() %>%
  rownames_to_column(var = "category")

# Plot NMDS

## defining NMDS arrows
arrow_mult <- 2
pc_scores_scaled <- pc_scores %>%
  mutate(NMDS1 = NMDS1 * arrow_mult,
         NMDS2 = NMDS2 * arrow_mult)

## adding offset columns to move arrow labels
pc_scores_scaled <- pc_scores_scaled %>%
  mutate(
    label_nudge_x = ifelse(NMDS1 >= 0, 0.05, -0.05),
    label_nudge_y = ifelse(NMDS2 >= 0, 0.05, -0.05)
  )

## plot
ggplot(site_scores, aes(x = NMDS1, y = NMDS2, color = year, shape = site)) +
  geom_point(size = 4) +
  stat_ellipse(aes(group = year), type = "t", linetype = 2) +
  geom_segment(data = pc_scores_scaled,
               aes(x = 0, y = 0, xend = NMDS1, yend = NMDS2),
               arrow = arrow(length = unit(0.3, "cm")),
               color = "black",
               inherit.aes = FALSE) +
  geom_text(data = pc_scores_scaled,
            aes(x = NMDS1 + label_nudge_x,
                y = NMDS2 + label_nudge_y,
                label = category),
            color = "black",
            inherit.aes = FALSE) +
  theme_minimal()

# PERMANOVA (adonis2)
set.seed(123)
adonis_res <- adonis2(benthic_pc ~ year, 
                      data = benthic_meta, 
                      method = "bray", 
                      permutations = 999)

print(adonis_res)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = benthic_pc ~ year, data = benthic_meta, permutations = 999, method = "bray")
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1  0.39445 0.32306 12.408  0.001 ***
## Residual 26  0.82653 0.67694                  
## Total    27  1.22098 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Coral species composition

Data overview:

  • 2017: “CoralCoverSpeciesScaledBySite”
    • One sheet (‘Data’) with percent cover by species
  • 2025: BenthicCoralCoverScaledByTransect.xlsx”
    • Separate sheets per family give percent cover by species - inconvenient format, need to bind all together

Import + wrangle data

# metadata for species - family matching
coral_meta <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoralCoverScaled.xlsx"), sheet = "Metadata") %>% 
  clean_names() %>%
  fill(scaled_coral_cover, column_header) 

coral_meta_fam <- coral_meta %>%
  filter(str_starts(column_header, "t") | column_header == "UNKN") %>%
  select(family_code = column_header, family = definition) %>%
  mutate(family = gsub("\\s*\\([^\\)]+\\)", "", family)) %>% # removing fluff
  distinct() # removing UNKN duplicate

coral_meta_spp <- coral_meta %>%
  filter(str_starts(scaled_coral_cover, "t") | scaled_coral_cover == "UNKN") %>%
  select(species_code = column_header, family_code = scaled_coral_cover, species = definition) %>%
  left_join(coral_meta_fam, by = "family_code") %>%
  mutate(genus = sub(" .*", "", species))

# historical data
coral_spp_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "CoralCoverSpeciesScaled", "CoralCoverSpeciesScaledBySite.xlsx"), sheet = "Data") %>% 
  rename_with(fix_names) %>%
  mutate(year = year(date)) %>%
  filter(year %in% c(2017, 2019)) %>% # 2022 was incomplete NPA year
  pivot_longer(
    cols = matches("^[a-z]{4}_(avg|std)$"),   # all columns like acer_ave, apal_std, etc.
    names_to = c("species_code", ".value"),        # split into species and measurement type
    names_sep = "_"
  ) %>%
  mutate(across(c(avg, std), ~ .x / 100)) %>% # to match 2025 format (fraction of 1)
  select(site, year, species_code, avg, std) 

# 2025 data
file_coral_nemma <- here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoralCoverScaled.xlsx")
file_coral_npa <- here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicCoralCoverScaled.xlsx")

clean_excel_coral <- function(file_path) {
  sheets <- excel_sheets(file_path)
  sheets_to_join <- setdiff(sheets, c("TermsOfUse", "Metadata", "Overall"))  # exclude
  
  sheets_list <- map(sheets_to_join, ~ read_excel(file_path, sheet = .x) %>%
                       rename_with(~ gsub("^(t)([A-Za-z]{4})(avg|med|std)$", "\\1_\\2_\\3", .x, ignore.case = TRUE)) %>%
                       rename_with(~ gsub("([A-Za-z]{4})(avg|med|std)$", "\\1_\\2", .x, ignore.case = TRUE)) %>%
                       clean_names())
  
  reduce(sheets_list, left_join, by = c("survey_id", "code", "name", "nt"))
}

coral_spp_2025 <- clean_excel_coral(file_coral_npa) %>%
  bind_rows(clean_excel_coral(file_coral_nemma)) %>%
  mutate(year = 2025) %>%
  rename(site = name) %>%
  pivot_longer(
    cols = matches("^[a-z]{4}_(avg|std)$"),   # all columns like acer_ave, apal_std, etc.
    names_to = c("species_code", ".value"),        # split into species and measurement type
    names_sep = "_"
  ) %>%
  select(site, year, species_code, avg, std) 

# merge for complete df
coral_spp_site <- bind_rows(coral_spp_hist, coral_spp_2025) %>%
  mutate(site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                                    "NEMMA")),
         site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site)) %>%
  mutate(species_code = toupper(species_code)) %>%
  left_join(coral_meta_spp, by = "species_code") %>%
  mutate(across(c(species, family), ~ str_replace(., "Unknown sp. of Live Coral", "Unknown spp.")))

coral_spp_year <- coral_spp_site %>%
  group_by(year, site_cat, family, genus, species_code, species) %>%
  summarize(mean = mean(avg), 
            sd = sd(avg)) %>%
  group_by(year, site_cat) %>%
  mutate(rel_mean = mean / sum(mean)) %>%
  group_by(year, site_cat, species) %>%
  filter(sum(mean) > 0) %>% # removing species that were 0 in all years/site_cats
  ungroup()

coral_fam_year <- coral_spp_year %>%
  group_by(year, site_cat, family) %>%
  summarize(mean = sum(mean)) %>%
  group_by(year, site_cat) %>%
  mutate(rel_mean = mean/sum(mean)) %>%
  group_by(year, site_cat) %>%
  complete(
    family = unique(coral_spp_year$family),
    fill = list(mean = 0, rel_mean = 0)
  ) %>%
  ungroup()

coral_fam_year_wide <- coral_fam_year %>%
  pivot_wider(names_from = family,
              values_from = c(mean, rel_mean),
              names_glue = "{family}_{.value}")

Line plots

chk <- coral_fam_year %>%
  group_by(year, site_cat) %>%
  summarize(tot = sum(rel_mean))

ggplot(coral_fam_year, aes(x = year, y = mean, group = site_cat, color = site_cat)) +
  geom_point() +
  geom_line() + 
  scale_y_continuous(labels = percent_format()) +
  labs(x = "Year", y = "Mean percent cover", color = "Location") +
  facet_wrap(~ family) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(coral_fam_year, aes(x = year, y = rel_mean, group = site_cat, color = site_cat)) +
  geom_point() +
  geom_line() + 
  scale_y_continuous(labels = percent_format()) +
  labs(x = "Year", y = "Relative proportion", color = "Location") +
  facet_wrap(~ family) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Radar plots

NEMMA

max_pc <- coral_fam_year_wide %>%
  filter(site_cat == "NEMMA") %>%
  select(contains("_mean") & 
           !contains("rel_mean") & 
           !contains("Unknown spp.") &
           where(~ any(. >= 0.001))) %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol_pc <- ncol(coral_fam_year_wide %>%
                  filter(site_cat == "NEMMA") %>%
                  select(contains("_mean") & 
                           !contains("rel_mean") & 
                           !contains("Unknown spp.") &
                           where(~ any(. >= 0.001)))
)

coral_rdr <- 
  rbind(rep(max_pc, ncol_pc), 
        rep(0, ncol_pc), 
        coral_fam_year_wide %>%
          filter(site_cat == "NEMMA") %>%
          column_to_rownames(var = "year") %>%
          select(contains("_mean") & 
                   !contains("rel_mean") & 
                   !contains("Unknown spp.") &
                   where(~ any(. >= 0.001))) %>%
          rename_with(~ gsub("_mean", "", .x))
        )
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

radarchart(coral_rdr,
           pcol = line_colors,
           pfcol = fill_colors,
           cglcol = "grey")

# doesn't like adding legend within rmarkdown - but can try within knit document
legend(
  x = "topright",
  legend = rownames(coral_rdr)[3:4],  # skip max/min rows
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",                   # no box
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors            # show fill colors in legend
)

NDNP

max_pc <- coral_fam_year_wide %>%
  filter(site_cat == "NDNP") %>%
  select(contains("_mean") & 
           !contains("rel_mean") & 
           !contains("Unknown spp.") &
           where(~ any(. >= 0.001))) %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol_pc <- ncol(coral_fam_year_wide %>%
                  filter(site_cat == "NDNP") %>%
                  select(contains("_mean") & 
                           !contains("rel_mean") & 
                           !contains("Unknown spp.") &
                           where(~ any(. >= 0.001)))
)

coral_rdr <- 
  rbind(rep(max_pc, ncol_pc), 
        rep(0, ncol_pc), 
        coral_fam_year_wide %>%
          filter(site_cat == "NDNP") %>%
          column_to_rownames(var = "year") %>%
          select(contains("_mean") & 
                   !contains("rel_mean") & 
                   !contains("Unknown spp.") &
                   where(~ any(. >= 0.001))) %>%
          rename_with(~ gsub("_mean", "", .x))
        )
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

radarchart(coral_rdr,
           pcol = line_colors,
           pfcol = fill_colors,
           cglcol = "grey")

# doesn't like adding legend within rmarkdown - but can try within knit document
legend(
  x = "topright",
  legend = rownames(coral_rdr)[3:4],  # skip max/min rows
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",                   # no box
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors            # show fill colors in legend
)

Fish

Data overview:

Notes + questions:

  • To include commercially significant species, would have to go back to raw 2025 data or do custom calculations by family

Biomass

Data overview:

  • 2017: FishBiomassByTransect.xlsx
    • “Data” sheet has transect-level data by family and group
  • 2025: FishBiomassByTransect.xlsx
    • “Overall” sheet has transect-level data by family
    • Family sheets have transect-level data by species

Import + wrangle data

# create compatible family key by merging metadata
fish_fam_hist_temp <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishBiomass", "FishBiomassBySite.xlsx"), sheet = "Key") %>% 
  clean_names() %>%
  slice(21:40) %>%
  select(code_com = 1,
         definition = 2) %>%
  mutate(family = str_remove(word(definition, 1), ":"),
         family = if_else(family == "Wrasse", "Wrasses", 
                          if_else(code_com == "PUFF", "Pufferfishes", family))) %>%
  select(-definition)

fish_fam_2025_temp <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishBiomass.xlsx"), sheet = "Metadata") %>% 
  clean_names() %>%
  slice(23:42) %>%
  select(code_lat = 2,
         definition = 3) %>%
  mutate(family = str_remove(word(definition, 1), ":")) %>%
  select(-definition)

fish_fam_key <- full_join(fish_fam_hist_temp, fish_fam_2025_temp, by = "family") %>%
  mutate(code_lat = tolower(gsub("^t", "", code_lat)),
         code_com = tolower(code_com)) %>%
  select(code_com, code_lat, family) %>%
  bind_rows(
    tibble(code_com = "t",
           code_lat = "all",
           family   = "Total"))

# import data
fish_bm_hist_temp <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishBiomass", "FishBiomassBySite.xlsx"), sheet = "Data") %>% 
  rename_with(fix_names) %>%
  mutate(year = year(date)) %>%
  filter(nt >= 8) %>% # keeping only sites with 8 or more transects
  select(site, year, contains("avg"), contains("std")) %>%
  pivot_longer(
    cols = -c(site, year),  
    names_to = "family_stat",      
    values_to = "value"                 
  ) %>%
  separate(family_stat, into = c("code_com", "stat"), sep = "_(?=[^_]+$)") %>%
  left_join(fish_fam_key, by = "code_com") %>%
  select(site, year, code = code_lat, family, stat, value)

fish_bm_2025_temp <- rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishBiomass.xlsx"), sheet = "Overall"),
                      read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishBiomass.xlsx"), sheet = "Overall")) %>%
  # deal with weird capitalization in species col names
  rename_with(fix_names) %>%
  filter(nt >= 8) %>% # keeping only sites with 8 or more transects
  mutate(year = 2025,
         site = if_else(name == "Little Bird Patch", "Little Bird Backreef", name)) %>%
  select(site, year, contains("avg"), contains("std")) %>%
  rename_with(~ gsub("^t_", "", .x), .cols = -c(site, year)) %>%
  pivot_longer(
    cols = -c(site, year),  
    names_to = "family_stat",      
    values_to = "value"                 
  ) %>%
  separate(family_stat, into = c("code_lat", "stat"), sep = "_(?=[^_]+$)") %>%
  left_join(fish_fam_key, by = "code_lat") %>%
  select(site, year, code = code_lat, family, stat, value)

# merge datasets
fish_bm_site_temp <- rbind(fish_bm_hist_temp, fish_bm_2025_temp) %>%
 mutate(site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA site inconsistencies
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                            "NEMMA"))) %>%
  mutate(year = if_else(year == 2024, 2025, year)) %>% # one erroneous 2024 date
  filter(year != 2022,
         !is.na(family)) %>% # 2022 was an incomplete data collection year for NPA
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # keeping only sites with over time comparisons
  ungroup()

herb_bm_site_temp <- fish_bm_site_temp %>%
  filter(code %in% c("scar", "acan")) %>%
  group_by(site, site_cat, year, stat) %>%
  summarise(
    value = if_else(stat == "avg", sum(value, na.rm = TRUE), 
                    sqrt(sum(value^2, na.rm = TRUE))), # for std
    .groups = "drop"
  ) %>%
  mutate(family = "Herbivores", code = "herb") %>%
  distinct()

fish_bm_site <- rbind(fish_bm_site_temp, herb_bm_site_temp) %>%
  pivot_wider(
    names_from = stat,   # use 'avg' and 'std' as new column names
    values_from = value  # fill those columns with the numeric values
  ) %>%
  unnest(c(avg, std)) # make numeric instead of list
# wide version in benthic ~ fish section

fish_bm_year <- fish_bm_site %>%
  group_by(year, site_cat, family, code) %>%
  summarize(mean = mean(avg)) %>%
  group_by(year, site_cat) %>%
  mutate(rel_mean = mean/sum(mean)) %>%
  group_by(year, site_cat) %>%
  complete(
    family = unique(fish_bm_site$family),
    fill = list(mean = 0, rel_mean = 0)
  ) %>%
  ungroup()

# wide format for graphing purposes
fish_bm_year_wide <- fish_bm_year %>%
  select(-code) %>%
  pivot_wider(names_from = family,
              values_from = c(mean, rel_mean),
              names_glue = "{family}_{.value}")

# clean environment and export
# rm(list = ls(pattern = "_temp$"))
write.csv(fish_bm_year, here("agrra_monitoring", "data_outputs", "fish_bm_year.csv"))

Line plots

ggplot(fish_bm_site %>% filter(code == "all"), 
       aes(x = year, y = avg, group = site, color = site)) +
  geom_point() +
  geom_line() +
  labs(x = "Year", y = "Total fish biomass (g/100m2)", color = "Site", group = "Site") +
  theme_minimal()

ggplot(fish_bm_site %>% filter(code == "scar"), 
       aes(x = year, y = avg, group = site, color = site)) +
  geom_point() +
  geom_line() +
  labs(x = "Year", y = "Scarid biomass (g/100m2)", color = "Site", group = "Site") +
  theme_minimal()

ggplot(fish_bm_site %>% filter(code == "acan"), 
       aes(x = year, y = avg, group = site, color = site)) +
  geom_point() +
  geom_line() +
  labs(x = "Year", y = "Acanthurid biomass (g/100m2)", color = "Site", group = "Site") +
  theme_minimal()

ggplot(fish_bm_site %>% filter(code == "serr"), 
       aes(x = year, y = avg, group = site, color = site)) +
  geom_point() +
  geom_line() +
  labs(x = "Year", y = "Serranid biomass (g/100m2)", color = "Site", group = "Site") +
  theme_minimal()

Radar plots

NEMMA

data <- fish_bm_year_wide %>%
  filter(site_cat == "NEMMA") %>%
  column_to_rownames(var = "year") %>%
  select(contains("_mean") & 
           !contains("rel_mean") & 
           !contains(c("Total", "Herbivores")) &
           where(~ any(. >= 100))) %>%
  rename_with(~ gsub("_mean", "", .x))

max <- data %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol <- ncol(data)

rdr <- 
  rbind(rep(max, ncol), 
        rep(0, ncol), 
        data)
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

radarchart(rdr,
           pcol = line_colors,
           pfcol = fill_colors,
           cglcol = "grey")

# doesn't like adding legend within rmarkdown - but can try within knit document
legend(
  x = "topright",
  legend = rownames(rdr)[3:4],  # skip max/min rows
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",                   # no box
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors            # show fill colors in legend
)

NDNP

data <- fish_bm_year_wide %>%
  filter(site_cat == "NDNP") %>%
  column_to_rownames(var = "year") %>%
  select(contains("_mean") & 
           !contains("rel_mean") & 
           !contains(c("Total", "Herbivores")) &
           where(~ any(. >= 100))) %>% # at least one mean bm value must be over 100
  rename_with(~ gsub("_mean", "", .x))

max <- data %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol <- ncol(data)

rdr <- 
  rbind(rep(max, ncol), 
        rep(0, ncol), 
        data)
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

radarchart(rdr,
           pcol = line_colors,
           pfcol = fill_colors,
           cglcol = "grey")

legend(
  x = "topright",
  legend = rownames(rdr)[3:4],  # skip max/min rows
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",                   # no box
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors            # show fill colors in legend
)

Length

Data overview:

  • 2017-2022: FishSizeFreqByTransect.xlsx (binned data only)
    • NF (number of fish) + raw numbers and % in each size bin
    • % is out of 100
    • Everything above 40cm gets aggregated
    • One sheet per family has transect-level size data
  • 2025: FishSizeFreqByTransect.xlsx (binned data only)
    • NF (number of fish) + % in each size bin
    • % is out of one (proportion)
    • Over 40cm, bins are by 10cm to the nearest 10cm until <200cm
    • “Overall” sheet has transect-level size data
    • Family sheets have data by family

Notes + questions:

  • Dealing with >40cm fish given aggregation into one group in historical data? Need to check raw data
  • Correct to exclude transects with no fish and include in mean size calculations?
  • Weighted means: calculate midpoint representation of each size bin and weight by the number of counts per bin

Scarids

Import + wrangle data

# starting with scarids only as mock-up
scar_length_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishSizeFreq", "FishSizeFreqByTransect.xlsx"), sheet = "PARR") %>%
  clean_names() %>%
  filter(!is.na(site)) %>%
  mutate(year = year(date),
         across(
    .cols = contains("percent_"),
    .fns  = ~ .x / 100)) %>%
  select(site, year, transect = trans, nf, contains("percent_")) %>%
  rename_with(~ str_remove_all(.x, "percent_")) %>%
  rename("41_up" = "40") %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishSizeFreqByTransect.xlsx"), sheet = "tSCAR") %>%
          clean_names() %>%
          mutate(year = 2025,
                 survey_name = if_else(survey_name == "Little Bird Patch", "Little Bird Backreef", survey_name)) %>%
          select(site = survey_name, year, transect = transect_name, nf, starts_with("x")) %>%
          rename_with(~ str_remove_all(.x, "^(x)|(cm$)")) %>%
          rowwise() %>% 
          mutate("41_up" = sum(c_across(matches("^\\d+$")), na.rm = TRUE)) %>% # select only columns with digits (e.g., 50, 60) and sum across these to be consistent with 2017 data, should not affect most of our fish families of interest
          ungroup() %>%
          select(!matches("^\\d+$"))) %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishSizeFreqByTransect.xlsx"), sheet = "tSCAR") %>%
          clean_names() %>%
          mutate(year = 2025) %>%
          select(site = survey_name, year, transect = transect_name, nf, starts_with("x")) %>%
          rename_with(~ str_remove_all(.x, "^(x)|(cm$)")) %>%
          rowwise() %>% 
          mutate("41_up" = sum(c_across(matches("^\\d+$")), na.rm = TRUE)) %>%
          ungroup() %>%
          select(!matches("^\\d+$"))) %>%
  mutate(site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA sites that have site numbers with different name variations afterwards
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                            "NEMMA"))) %>%
  filter(year != 2022) %>% # 2022 was an incomplete data collection year for NPA
  group_by(site, year) %>%
  filter(n_distinct(transect) >= 8) %>% # keeping only sites with at least 8 transects per year
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
  ungroup()

# check that all transects are present even if no fish per family were recorded - should have 0 entries if all is well
# chk <- full_join(scar_length_trans %>%
#                    group_by(year, site) %>%
#                    summarize(count = n()), 
#                  fish_bm_trans %>%
#                    group_by(year, site) %>%
#                    summarize(count = n()), 
#                  by = c("year", "site"), suffix = c("_length", "_bm")) %>%
#   filter(count_length != count_bm | is.na(count_length) | is.na(count_bm))

# transform data for summaries
scar_length_long <- scar_length_trans %>%
  filter(nf != 0) %>%
  pivot_longer(
    cols = matches("^\\d+_?\\d?"), 
    names_to = "length_mid", 
    values_to = "percent"
  ) %>%
  mutate(
    # convert bin names like "0_5" -> 2.5
    length_mid = str_extract(length_mid, "\\d+_?\\d*"),
    length_mid = ifelse(str_detect(length_mid, "_"),
                        sapply(str_split(length_mid, "_"), function(x) mean(as.numeric(x))),
                        as.numeric(length_mid))) %>%
  group_by(year, site, transect) %>%
  mutate(prop = percent / sum(percent)) %>% # accounts for small deviations where total proportions don't equal exactly 1
  ungroup() %>%
  select(-percent)

scar_length_site <- scar_length_long %>%
  group_by(year, site, site_cat, length_mid) %>%
  summarise(mean_prop = mean(prop), .groups = "drop")

scar_length_year <- scar_length_site %>%
  group_by(year, site_cat, length_mid) %>%
  summarise(mean_prop = mean(mean_prop), .groups = "drop")

Density plots

ggplot(scar_length_year %>% 
         mutate(year = as.character(year)), 
       aes(x = length_mid, weight = mean_prop, group = year, color = year, fill = year)) +
  geom_density(alpha = 0.3, adjust = 1) +
  facet_wrap(~ site_cat, ncol = 2) +
  labs(x = "Fish length (cm) - Scaridae", y = "Density") +
  theme_minimal()

Chi-square tests

# transform data for summaries
scar_length_long_binned <- scar_length_trans %>%
  filter(nf != 0) %>%
  pivot_longer(
    cols = matches("^\\d+_?\\d?"), 
    names_to = "size_bin", 
    values_to = "percent"
  ) %>%
  rename(nf_tot = nf) %>%
  mutate(nf_bin = nf_tot * percent)

scar_counts <- scar_length_long_binned %>%
  mutate(size_bin = case_when(
    size_bin %in% c("21_30", "31_40", "41_up") ~ "21_up",
    TRUE ~ size_bin)) %>%
  group_by(year, site_cat, size_bin) %>%
  summarise(count = sum(nf_bin), .groups = "drop")
NEMMA
nemma_table <- scar_counts %>%
  filter(site_cat == "NEMMA") %>%
  select(year, size_bin, count) %>%
  pivot_wider(names_from = year, values_from = count, values_fill = 0)

chisq_result_nemma <- chisq.test(as.matrix(nemma_table[,-1]))
chisq_result_nemma
## 
##  Pearson's Chi-squared test
## 
## data:  as.matrix(nemma_table[, -1])
## X-squared = 81.011, df = 3, p-value < 2.2e-16
NDNP
ndnp_table <- scar_counts %>%
  filter(site_cat == "NDNP") %>%
  select(year, size_bin, count) %>%
  pivot_wider(names_from = year, values_from = count, values_fill = 0)

chisq_result_ndnp <- chisq.test(as.matrix(ndnp_table[,-1]))
chisq_result_ndnp
## 
##  Pearson's Chi-squared test
## 
## data:  as.matrix(ndnp_table[, -1])
## X-squared = 15.239, df = 3, p-value = 0.001623

Mosaic plots

tab <- scar_counts %>%
  filter(site_cat == "NEMMA") %>%
  mutate(size_bin = factor(size_bin, levels = c("0_5","6_10","11_20","21_up"))) %>%
  arrange(size_bin) %>%
  pivot_wider(names_from = year, values_from = count, values_fill = 0) %>%
  select(-site_cat)

# convert to matrix, drop the size_bin column
mat <- as.matrix(tab %>% select(-size_bin))
row.names(mat) <- tab$size_bin

mosaicplot(mat, 
           color = TRUE,           # color each year
           main = paste("Fish size distribution by year —", "NEMMA"),
           xlab = "Size Bin",
           ylab = "Year",
           las = 1)                # rotate y-axis labels

tab <- scar_counts %>%
  filter(site_cat == "NDNP") %>%
  mutate(size_bin = factor(size_bin, levels = c("0_5","6_10","11_20","21_up"))) %>%
  arrange(size_bin) %>%
  pivot_wider(names_from = year, values_from = count, values_fill = 0) %>%
  select(-site_cat)

# convert to matrix, drop the size_bin column
mat <- as.matrix(tab %>% select(-size_bin))
row.names(mat) <- tab$size_bin

mosaicplot(mat, 
           color = TRUE,           # color each year
           main = paste("Fish size distribution by year —", "NDNP"),
           xlab = "Size Bin",
           ylab = "Year",
           las = 1)                # rotate y-axis labels

Phase changes

Currently only 2025 data, because appears that 2017/2019 have no phase data…?

# raw historical data

fish_tax_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "FishTaxonomy", skip = 1, col_names = TRUE) %>% # deals with merged headers
  clean_names() %>%
  unite(species, genus, species, sep = " ", remove = TRUE)

fish_raw_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-FishRaw.xlsx"), sheet = "Counts Raw") %>%
  clean_names() %>%
  left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "Transect") %>%
              clean_names() %>%
              rename(transect = id, date = surveyed) %>%
              left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "Survey") %>%
                          clean_names() %>%
                          select(survey = id, site = name))) %>%
  filter(!is.na(site)) %>%
  mutate(year = year(date),
         size_class = str_replace_all(size_class, "-", "_"),
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                                    "NEMMA")),
         # convert bin names like "0_5" -> 2.5
         length_mid = str_extract(size_class, "\\d+_?\\d*"),
         length_mid = ifelse(str_detect(length_mid, "_"),
                             sapply(str_split(length_mid, "_"), function(x) mean(as.numeric(x))),
                             as.numeric(length_mid))) %>%
  left_join(fish_tax_hist, by = c("taxonomy" = "id")) %>%
  select(year, site, site_cat, trans = transect, size_class, length_mid, common_name, common_family, family, species) # no terminal phase in this data?

# raw 2025 data

fish_tax_2025 <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Raw", "Metadata.xlsx"), sheet = "FishTaxonomy", skip = 1, col_names = TRUE) %>% # deals with merged headers
  clean_names() %>%
  unite(species, genus, species, sep = " ", remove = TRUE)
# could replace NA species with 'spp.' before uniting

chk <- fish_tax_hist %>% # make sure taxonomic lookups are consistent
  full_join(fish_tax_2025, by = "id", suffix = c("_hist", "_2025")) %>%
  mutate(match = species_hist == species_2025)

fish_raw_2025 <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Raw", "FishRaw.xlsx"), sheet = "Counts Raw") %>%
  clean_names() %>%
  left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Raw", "Metadata.xlsx"), sheet = "Transect") %>%
              clean_names() %>%
              select(transect = id, survey, date = surveyed) %>%
              left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Raw", "Metadata.xlsx"), sheet = "Survey") %>%
                          clean_names() %>%
                          select(survey = id, site = name))) %>%
  bind_rows(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Raw", "FishRaw.xlsx"), sheet = "Counts Raw") %>%
              clean_names() %>%
              left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Raw", "Metadata.xlsx"), sheet = "Transect") %>%
                          clean_names() %>%
                          select(transect = id, survey, date = surveyed) %>%
                          left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Raw", "Metadata.xlsx"), sheet = "Survey") %>%
                                      clean_names() %>%
                                      select(survey = id, site = name)))
  ) %>%
  clean_names() %>%
  mutate(year = year(date),
         size_class = str_replace_all(size_class, " - ", "_"),
         size_class = str_replace_all(size_class, "cm", ""),
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                                    "NEMMA")),
         # convert bin names like "0_5" -> 2.5
         length_mid = str_extract(size_class, "\\d+_?\\d*"),
         length_mid = ifelse(str_detect(length_mid, "_"),
                             sapply(str_split(length_mid, "_"), function(x) mean(as.numeric(x))),
                             as.numeric(length_mid))) %>%
  left_join(fish_tax_2025, by = c("taxonomy" = "id")) %>%
  select(year, site, site_cat, trans = transect, size_class, length_mid, common_name, common_family, family, species, terminal_phase)

# merge all years

# fish_raw <- bind_rows(fish_raw_hist, fish_raw_2025)

# scarids only

scarids <- fish_raw_2025 %>%
  filter(species %in% c("Sparisoma viride", "Sparisoma aurofrenatum", "Scarus vetula", "Scarus iseri")) %>%
  mutate(sex = case_when(terminal_phase == "No" ~ "Female",
                         terminal_phase == "Yes" ~ "Male"),
         size_class = factor(size_class, levels = c("0_5","6_10","11_20","21_30", "31_40", "41_50")))

ggplot(scarids, aes(x = length_mid, fill = sex)) +
  geom_histogram(stat = "count", alpha = 0.8, position = position_dodge()) +
  facet_wrap(vars(species), ncol = 4) +
  geom_vline(data = filter(scarids, species == "Sparisoma viride"), aes(xintercept = 18, color = "Minimum length at maturity"), linetype = "dashed") +
  geom_vline(data = filter(scarids, species == "Scarus vetula"), aes(xintercept = 19, color = "Minimum length at maturity"), linetype = "dashed") +
  geom_vline(data = filter(scarids, species == "Sparisoma aurofrenatum"), aes(xintercept = 15, color = "Minimum length at maturity"), linetype = "dashed") +
  geom_vline(data = filter(scarids, species == "Scarus iseri"), aes(xintercept = 19, color = "Minimum length at maturity"), linetype = "dashed") +
  scale_color_manual(values = c("black")) +
  scale_fill_manual(values=c("pink2", "turquoise3")) +
  theme_bw() +
  labs(x = "Fish length (cm)", y = "Number observed", fill="") +
  theme(strip.text = element_text(face = "italic"),
        legend.title = element_blank(),
        legend.position = "top",
        axis.text.x = element_text(angle = 45, hjust = 1))

Serranids

Import + wrangle data

# starting with scarids only as mock-up
serr_length_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishSizeFreq", "FishSizeFreqByTransect.xlsx"), sheet = "GROU") %>%
  clean_names() %>%
  filter(!is.na(site)) %>%
  mutate(year = year(date),
         across(
    .cols = contains("percent_"),
    .fns  = ~ .x / 100)) %>%
  select(site, year, transect = trans, nf, contains("percent_")) %>%
  rename_with(~ str_remove_all(.x, "percent_")) %>%
  rename("41_up" = "40") %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishSizeFreqByTransect.xlsx"), sheet = "tSERR") %>%
          clean_names() %>%
          mutate(year = 2025,
                 survey_name = if_else(survey_name == "Little Bird Patch", "Little Bird Backreef", survey_name)) %>%
          select(site = survey_name, year, transect = transect_name, nf, starts_with("x")) %>%
          rename_with(~ str_remove_all(.x, "^(x)|(cm$)")) %>%
          rowwise() %>% 
          mutate("41_up" = sum(c_across(matches("^\\d+$")), na.rm = TRUE)) %>% # select only columns with digits (e.g., 50, 60) and sum across these to be consistent with 2017 data, should not affect most of our fish families of interest
          ungroup() %>%
          select(!matches("^\\d+$"))) %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishSizeFreqByTransect.xlsx"), sheet = "tSERR") %>%
          clean_names() %>%
          mutate(year = 2025) %>%
          select(site = survey_name, year, transect = transect_name, nf, starts_with("x")) %>%
          rename_with(~ str_remove_all(.x, "^(x)|(cm$)")) %>%
          rowwise() %>% 
          mutate("41_up" = sum(c_across(matches("^\\d+$")), na.rm = TRUE)) %>%
          ungroup() %>%
          select(!matches("^\\d+$"))) %>%
  mutate(site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA sites that have site numbers with different name variations afterwards
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                            "NEMMA"))) %>%
  filter(year != 2022) %>% # 2022 was an incomplete data collection year for NPA
  group_by(site, year) %>%
  filter(n_distinct(transect) >= 8) %>% # keeping only sites with at least 8 transects per year
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
  ungroup()

# check that all transects are present even if no fish per family were recorded - should have 0 entries if all is well
# chk <- full_join(serr_length_trans %>%
#                    group_by(year, site) %>%
#                    summarize(count = n()), 
#                  fish_bm_trans %>%
#                    group_by(year, site) %>%
#                    summarize(count = n()), 
#                  by = c("year", "site"), suffix = c("_length", "_bm")) %>%
#   filter(count_length != count_bm | is.na(count_length) | is.na(count_bm))

# transform data for summaries
serr_length_long <- serr_length_trans %>%
  filter(nf != 0) %>%
  pivot_longer(
    cols = matches("^\\d+_?\\d?"), 
    names_to = "length_mid", 
    values_to = "percent"
  ) %>%
  mutate(
    # convert bin names like "0_5" -> 2.5
    length_mid = str_extract(length_mid, "\\d+_?\\d*"),
    length_mid = ifelse(str_detect(length_mid, "_"),
                        sapply(str_split(length_mid, "_"), function(x) mean(as.numeric(x))),
                        as.numeric(length_mid))) %>%
  group_by(year, site, transect) %>%
  mutate(prop = percent / sum(percent)) %>% # accounts for small deviations where total proportions don't equal exactly 1
  ungroup() %>%
  select(-percent)

serr_length_site <- serr_length_long %>%
  group_by(year, site, site_cat, length_mid) %>%
  summarise(mean_prop = mean(prop), .groups = "drop")

serr_length_year <- serr_length_site %>%
  group_by(year, site_cat, length_mid) %>%
  summarise(mean_prop = mean(mean_prop), .groups = "drop")

Density plots

ggplot(serr_length_year %>% 
         mutate(year = as.character(year)), 
       aes(x = length_mid, weight = mean_prop, group = year, color = year, fill = year)) +
  geom_density(alpha = 0.3, adjust = 1) +
  facet_wrap(~ site_cat, ncol = 2) +
  labs(x = "Fish length (cm) - Serranidae", y = "Density") +
  theme_minimal()

Benthic ~ Fish

Integrate datasets

benth_fish_site <- fish_bm_site %>%
  select(-family) %>%
  rename(mean = avg) %>%
  pivot_wider(names_from = code,
              values_from = c(mean, std),
              names_glue = "{code}_{.value}") %>%
  left_join(benthic_cover_site, by = c("site", "site_cat", "year"))

Exploratory models and plots

No statistically significant relationships found (including when scarid and acanthurids are aggregated)

mod <- lm(fma_mean ~ scar_mean, data = benth_fish_site)
summary(mod)
## 
## Call:
## lm(formula = fma_mean ~ scar_mean, data = benth_fish_site)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23116 -0.11016 -0.01080  0.08115  0.38401 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.083e-01  3.988e-02   7.730 3.71e-09 ***
## scar_mean   -3.646e-05  2.748e-05  -1.327    0.193    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1393 on 36 degrees of freedom
## Multiple R-squared:  0.04664,    Adjusted R-squared:  0.02016 
## F-statistic: 1.761 on 1 and 36 DF,  p-value: 0.1928
r2 <- summary(mod)$r.squared
pval <- summary(mod)$coefficients[2,4]

ggplot(benth_fish_site, aes(x = scar_mean, y = fma_mean)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
  labs(x = "Scarid biomass", y = "FMA cover") +
  annotate("text", x = Inf, y = Inf,
           label = paste0("R² = ", round(r2, 2), 
                          "\nP = ", signif(pval, 2)),
           hjust = 1.1, vjust = 1.2, size = 4) +
  theme_bw()

mod <- lm(fma_mean ~ herb_mean, data = benth_fish_site)
summary(mod)
## 
## Call:
## lm(formula = fma_mean ~ herb_mean, data = benth_fish_site)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.20618 -0.10757 -0.01420  0.07757  0.41048 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.203e-01  4.543e-02   7.051 2.82e-08 ***
## herb_mean   -2.266e-05  1.607e-05  -1.410    0.167    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1389 on 36 degrees of freedom
## Multiple R-squared:  0.05235,    Adjusted R-squared:  0.02602 
## F-statistic: 1.989 on 1 and 36 DF,  p-value: 0.1671
r2 <- summary(mod)$r.squared
pval <- summary(mod)$coefficients[2,4]

ggplot(benth_fish_site, aes(x = herb_mean, y = fma_mean)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
  labs(x = "Herbivore biomass", y = "FMA cover") +
  annotate("text", x = Inf, y = Inf,
           label = paste0("R² = ", round(r2, 2), 
                          "\nP = ", signif(pval, 2)),
           hjust = 1.1, vjust = 1.2, size = 4) +
  theme_bw()

mod <- lm(lc_mean ~ scar_mean, data = benth_fish_site)
summary(mod)
## 
## Call:
## lm(formula = lc_mean ~ scar_mean, data = benth_fish_site)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.05273 -0.02691 -0.01161  0.01497  0.15619 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.675e-02  1.209e-02   3.869 0.000442 ***
## scar_mean   6.717e-06  8.326e-06   0.807 0.425141    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04221 on 36 degrees of freedom
## Multiple R-squared:  0.01776,    Adjusted R-squared:  -0.009529 
## F-statistic: 0.6508 on 1 and 36 DF,  p-value: 0.4251
r2 <- summary(mod)$r.squared
pval <- summary(mod)$coefficients[2,4]

ggplot(benth_fish_site, aes(x = scar_mean, y = lc_mean)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
  labs(x = "Scarid biomass", y = "Live coral cover") +
  annotate("text", x = Inf, y = Inf,
           label = paste0("R² = ", round(r2, 2), 
                          "\nP = ", signif(pval, 2)),
           hjust = 1.1, vjust = 1.2, size = 4) +
  theme_bw()

mod <- lm(lc_mean ~ herb_mean, data = benth_fish_site)
summary(mod)
## 
## Call:
## lm(formula = lc_mean ~ herb_mean, data = benth_fish_site)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.045407 -0.028903 -0.009291  0.014008  0.156349 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.741e-02  1.392e-02   4.124  0.00021 ***
## herb_mean   -1.069e-06  4.924e-06  -0.217  0.82931    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04256 on 36 degrees of freedom
## Multiple R-squared:  0.001308,   Adjusted R-squared:  -0.02643 
## F-statistic: 0.04716 on 1 and 36 DF,  p-value: 0.8293
r2 <- summary(mod)$r.squared
pval <- summary(mod)$coefficients[2,4]

ggplot(benth_fish_site, aes(x = herb_mean, y = lc_mean)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
  labs(x = "Herbivore biomass", y = "Live coral cover") +
  annotate("text", x = Inf, y = Inf,
           label = paste0("R² = ", round(r2, 2), 
                          "\nP = ", signif(pval, 2)),
           hjust = 1.1, vjust = 1.2, size = 4) +
  theme_bw()

Correlation tests

Testing correlation between scarid biomass and benthic variables - nothing is notably strong

benth_vars <- benth_fish_site %>%
  select(lc_mean, cca_mean, ta_mean, tas_mean, fma_mean, cyan_mean, ainv_mean, peys_mean)

cor_results <- cor(benth_vars, benth_fish_site$scar_mean, use = "pairwise.complete.obs")
cor_results
##                  [,1]
## lc_mean    0.13324977
## cca_mean   0.38089582
## ta_mean    0.21085969
## tas_mean  -0.18002397
## fma_mean  -0.21595995
## cyan_mean  0.25669404
## ainv_mean  0.36476851
## peys_mean -0.01637593

Multivariate tests (PERMANOVA)

benth_mat <- benth_fish_site %>%
  select(lc_mean, cca_mean, ta_mean, tas_mean, fma_mean)

adonis2(benth_mat ~ scar_mean + site_cat + year,
        data = benth_fish_site,
        method = "bray",
        permutations = 999,
        by = "term")
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = benth_mat ~ scar_mean + site_cat + year, data = benth_fish_site, permutations = 999, method = "bray", by = "term")
##           Df SumOfSqs      R2       F Pr(>F)    
## scar_mean  1  0.15087 0.05318  5.3525  0.008 ** 
## site_cat   1  1.13209 0.39906 40.1633  0.001 ***
## year       1  0.59559 0.20994 21.1298  0.001 ***
## Residual  34  0.95837 0.33782                   
## Total     37  2.83693 1.00000                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res <- adonis2(benth_mat ~ scar_mean + site_cat + year,
               data = benth_fish_site,
               method = "bray",
               permutations = 999,
               by = "term")

res_df <- broom::tidy(res)

ggplot(res_df, aes(x = reorder(term, R2), y = R2)) +
  geom_col(fill = "skyblue") +
  coord_flip() +
  labs(x = "Predictor", y = "Proportion of variation explained (R²)") +
  theme_bw()

Archive

access_meta_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "Transect") %>%
  clean_names() %>% 
  left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "Survey") %>%
              clean_names() %>%
              rename(survey = id)) %>%
  select(transect = id, date = surveyed, site = name)
  


# checking percent that are unknown
chk <- coral_spp_hist %>%
  mutate(category = if_else(species == "unkn", "unknown", "known")) %>%
  group_by(site, year, category) %>%
  summarize(cover = sum(avg))

# old biomass import code - holding for now
fish_bm_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishBiomass", "FishBiomassByTransect.xlsx"), sheet = "Data") %>% 
  clean_names() %>%
  filter(!is.na(site)) %>%
  mutate(year = year(date)) %>% 
  select(site, year, transect = trans, t_tot = t, t_scar = parr, t_acan = surg, t_serr = grou) %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishBiomassByTransect.xlsx"), sheet = "Overall") %>% 
          clean_names() %>%
          mutate(year = year(surveyed),
                 survey_name = if_else(survey_name == "Little Bird Patch", "Little Bird Backreef", survey_name)) %>% # inconsistency in site name between 2017 and 2025
          select(site = survey_name, year, transect = transect_name, t_tot = all, t_scar, t_acan, t_serr)) %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishBiomassByTransect.xlsx"), sheet = "Overall") %>% 
          clean_names() %>%
          mutate(year = year(surveyed)) %>%
          select(site = survey_name, year, transect = transect_name, t_tot = all, t_scar, t_acan, t_serr)) %>%
  mutate(t_herb = t_scar + t_acan,
         site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA sites that have site numbers with different name variations afterwards
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                            "NEMMA"))) %>%
  mutate(year = if_else(year == 2024, 2025, year)) %>% # Site 11 had one erroneous 2024 date
  filter(year != 2022) %>% # 2022 was an incomplete data collection year for NPA
  group_by(site, year) %>%
  filter(n_distinct(transect) >= 8) %>% # keeping only sites with at least 8 transects per year
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # keeping only sites with over time comparisons
  ungroup()

fish_bm_site <- fish_bm_trans %>%
  group_by(site, site_cat, year) %>%
  rename_with(~ str_remove(.x, "t_")) %>%
  summarise(across(c(tot, herb, scar, acan, serr),
                   list(mean = ~ mean(.x, na.rm = TRUE),
                        se   = ~ se(.x)),
                   .names = "{.col}_{.fn}")) %>%
  ungroup()

fish_bm_year <- fish_bm_site %>%
  select(!contains("_se")) %>%
  rename_with(~ str_remove(.x, "_mean")) %>%
  group_by(site_cat, year) %>%
  summarise(across(c(tot, herb, scar, acan, serr),
                   list(mean = ~ mean(.x, na.rm = TRUE),
                        se   = ~ se(.x)),
                   .names = "{.col}_{.fn}")) %>%
  ungroup()

# importing and merging coral spp sheets for 2025
sheets <- excel_sheets(file_coral)
sheets_to_join <- setdiff(sheets, c("TermsOfUse", "Metadata", "Overall")) # exclude two sheets before merging remainder
sheets_list <- map(sheets_to_join, ~ read_excel(file_coral, sheet = .x) %>%
                     rename_with(~ gsub("([A-Za-z]{4})(avg|med|std)$", "\\1_\\2", .x)) %>% 
                     rename_with(~ gsub("^(t)([A-Za-z]{4})(ave|med|std)$", "\\1_\\2_\\3", .x)) %>% 
                     clean_names())

coral_spp_2025 <- reduce(sheets_list, left_join, by = c("survey_id", "code", "name", "nt"))


# benthic ~ fish by year
benth_fish_year <- benth_fish_site %>%
  select(-contains("_se")) %>%
  rename_with(~ str_remove(.x, "_mean$")) %>%
  group_by(year, site_cat) %>%
  summarise(
    across(
      where(is.numeric),
      list(mean = ~mean(.x, na.rm = TRUE),
           se   = ~se(.x)),
      .names = "{.col}_{.fn}"
    ),
    .groups = "drop"
  )